{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-08-25T07:18:10.223707800Z",
     "start_time": "2025-08-25T07:17:47.556643500Z"
    }
   },
   "outputs": [
    {
     "ename": "ImportError",
     "evalue": "Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mModuleNotFoundError\u001B[0m                       Traceback (most recent call last)",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\compat\\_optional.py:135\u001B[0m, in \u001B[0;36mimport_optional_dependency\u001B[1;34m(name, extra, errors, min_version)\u001B[0m\n\u001B[0;32m    134\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 135\u001B[0m     module \u001B[38;5;241m=\u001B[39m \u001B[43mimportlib\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mimport_module\u001B[49m\u001B[43m(\u001B[49m\u001B[43mname\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    136\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mImportError\u001B[39;00m:\n",
      "File \u001B[1;32m~\\AppData\\Local\\Programs\\Python\\Python39\\lib\\importlib\\__init__.py:127\u001B[0m, in \u001B[0;36mimport_module\u001B[1;34m(name, package)\u001B[0m\n\u001B[0;32m    126\u001B[0m         level \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n\u001B[1;32m--> 127\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43m_bootstrap\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_gcd_import\u001B[49m\u001B[43m(\u001B[49m\u001B[43mname\u001B[49m\u001B[43m[\u001B[49m\u001B[43mlevel\u001B[49m\u001B[43m:\u001B[49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mpackage\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mlevel\u001B[49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32m<frozen importlib._bootstrap>:1030\u001B[0m, in \u001B[0;36m_gcd_import\u001B[1;34m(name, package, level)\u001B[0m\n",
      "File \u001B[1;32m<frozen importlib._bootstrap>:1007\u001B[0m, in \u001B[0;36m_find_and_load\u001B[1;34m(name, import_)\u001B[0m\n",
      "File \u001B[1;32m<frozen importlib._bootstrap>:984\u001B[0m, in \u001B[0;36m_find_and_load_unlocked\u001B[1;34m(name, import_)\u001B[0m\n",
      "\u001B[1;31mModuleNotFoundError\u001B[0m: No module named 'openpyxl'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001B[1;31mImportError\u001B[0m                               Traceback (most recent call last)",
      "Cell \u001B[1;32mIn[3], line 3\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21;01mpandas\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mas\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21;01mpd\u001B[39;00m\n\u001B[1;32m----> 3\u001B[0m df \u001B[38;5;241m=\u001B[39m \u001B[43mpd\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mread_excel\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mstudent_scores.xlsx\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[0;32m      4\u001B[0m \u001B[38;5;28mprint\u001B[39m(df)\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\excel\\_base.py:495\u001B[0m, in \u001B[0;36mread_excel\u001B[1;34m(io, sheet_name, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, date_format, thousands, decimal, comment, skipfooter, storage_options, dtype_backend, engine_kwargs)\u001B[0m\n\u001B[0;32m    493\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(io, ExcelFile):\n\u001B[0;32m    494\u001B[0m     should_close \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[1;32m--> 495\u001B[0m     io \u001B[38;5;241m=\u001B[39m \u001B[43mExcelFile\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m    496\u001B[0m \u001B[43m        \u001B[49m\u001B[43mio\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    497\u001B[0m \u001B[43m        \u001B[49m\u001B[43mstorage_options\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mstorage_options\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    498\u001B[0m \u001B[43m        \u001B[49m\u001B[43mengine\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mengine\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    499\u001B[0m \u001B[43m        \u001B[49m\u001B[43mengine_kwargs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mengine_kwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    500\u001B[0m \u001B[43m    \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    501\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m engine \u001B[38;5;129;01mand\u001B[39;00m engine \u001B[38;5;241m!=\u001B[39m io\u001B[38;5;241m.\u001B[39mengine:\n\u001B[0;32m    502\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[0;32m    503\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mEngine should not be specified when passing \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    504\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124man ExcelFile - ExcelFile already has the engine set\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    505\u001B[0m     )\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\excel\\_base.py:1567\u001B[0m, in \u001B[0;36mExcelFile.__init__\u001B[1;34m(self, path_or_buffer, engine, storage_options, engine_kwargs)\u001B[0m\n\u001B[0;32m   1564\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mengine \u001B[38;5;241m=\u001B[39m engine\n\u001B[0;32m   1565\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstorage_options \u001B[38;5;241m=\u001B[39m storage_options\n\u001B[1;32m-> 1567\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_reader \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engines\u001B[49m\u001B[43m[\u001B[49m\u001B[43mengine\u001B[49m\u001B[43m]\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m   1568\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_io\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1569\u001B[0m \u001B[43m    \u001B[49m\u001B[43mstorage_options\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mstorage_options\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1570\u001B[0m \u001B[43m    \u001B[49m\u001B[43mengine_kwargs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mengine_kwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1571\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:552\u001B[0m, in \u001B[0;36mOpenpyxlReader.__init__\u001B[1;34m(self, filepath_or_buffer, storage_options, engine_kwargs)\u001B[0m\n\u001B[0;32m    534\u001B[0m \u001B[38;5;129m@doc\u001B[39m(storage_options\u001B[38;5;241m=\u001B[39m_shared_docs[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mstorage_options\u001B[39m\u001B[38;5;124m\"\u001B[39m])\n\u001B[0;32m    535\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21m__init__\u001B[39m(\n\u001B[0;32m    536\u001B[0m     \u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m    539\u001B[0m     engine_kwargs: \u001B[38;5;28mdict\u001B[39m \u001B[38;5;241m|\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[0;32m    540\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m    541\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m    542\u001B[0m \u001B[38;5;124;03m    Reader using openpyxl engine.\u001B[39;00m\n\u001B[0;32m    543\u001B[0m \n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m    550\u001B[0m \u001B[38;5;124;03m        Arbitrary keyword arguments passed to excel engine.\u001B[39;00m\n\u001B[0;32m    551\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[1;32m--> 552\u001B[0m     \u001B[43mimport_optional_dependency\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mopenpyxl\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[0;32m    553\u001B[0m     \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__init__\u001B[39m(\n\u001B[0;32m    554\u001B[0m         filepath_or_buffer,\n\u001B[0;32m    555\u001B[0m         storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m    556\u001B[0m         engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m    557\u001B[0m     )\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\compat\\_optional.py:138\u001B[0m, in \u001B[0;36mimport_optional_dependency\u001B[1;34m(name, extra, errors, min_version)\u001B[0m\n\u001B[0;32m    136\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mImportError\u001B[39;00m:\n\u001B[0;32m    137\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m errors \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mraise\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[1;32m--> 138\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mImportError\u001B[39;00m(msg)\n\u001B[0;32m    139\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m    141\u001B[0m \u001B[38;5;66;03m# Handle submodules: if we have submodule, grab parent module from sys.modules\u001B[39;00m\n",
      "\u001B[1;31mImportError\u001B[0m: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl."
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "a1 = np.array([0, 1, 2])\n",
    "print(a1, type(a1))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.559641600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a2 = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])\n",
    "print(a2, type(a2))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.562640200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a3 = np.array([[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14],\n",
    "                                                   [15, 16, 17]],\n",
    "               [[18, 19, 20], [21, 22, 23], [24, 25, 26]]])\n",
    "print(a3, type(a3))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.565638400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "n1 = np.zeros(10)  # 生成10个浮点型0\n",
    "n2 = np.ones(8)  # 生成8个浮点型1\n",
    "n3 = np.empty(9)  # 生成9个随机值，很夸张的随机值\n",
    "n4 = np.arange(10)  # 生成10个，不包括10，步长1,默认从0开始\n",
    "n5 = np.ones_like(n4)  # 生成与n4结构一样的1值\n",
    "n6 = np.zeros_like(n4)  # 生成与n4结构一样的0值\n",
    "n7 = np.empty_like(n4)  # 生成与n4结构一样的随机值\n",
    "print(n1)\n",
    "print(n2)\n",
    "print(n3)\n",
    "print(n4)\n",
    "print(n5)\n",
    "print(n6)\n",
    "print(n7)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.569636400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(np.arange(2, 10, 0.5))  # 从2开始，步长0.5 ，不超过10\n",
    "print(np.linspace(3, 6, num=5, endpoint=True))  # 从3到6，生成均匀的5个值，包括最终值6\n",
    "print(np.random.randn(3, 4))  # 返回一个3行4列的随机数组，浮点型\n",
    "print(np.random.randint(0, 10, size=(3, 4)))  # 返回一个3行4列的随机数组，整型,随机值范围0到9\n",
    "print(np.random.randint(0, 10, size=(3, 3, 3)))  # 返回一个3维数组，整型,随机值范围0到9"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.572634400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "a = np.array([1, 2, 3], dtype=np.int32)  # 32位整数\n",
    "b = np.array([255, 0], dtype=np.uint8)  # 8位无符号整数"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.575632600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(a, b)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.578630800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "c = np.array([1.0, 2.5], dtype=np.float32)  # 单精度浮点\n",
    "d = np.array([3.1415926535], dtype=np.float64)  # 双精度浮点"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.581629600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(c, d)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.584627500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "e = np.array([1 + 2j, 3 + 4j], dtype=np.complex64)\n",
    "print(e)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.587626100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "f = np.array([True, False], dtype=np.bool_)\n",
    "print(f)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.590624800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "g = np.array([\"hello\", \"numpy\"], dtype=np.str_)  # Unicode\n",
    "h = np.array([b\"abc\", b\"123\"], dtype=np.bytes_)  # 字节字符串"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.593622900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(g, h)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.597620100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "i = np.array([\"2023-01-01\", \"2024-12-31\"], dtype=\"datetime64[D]\")  # 天精度\n",
    "j = np.array([1, 7], dtype=\"timedelta64[D]\")  # 时间差（天）"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.600618500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(i, j)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.602618200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "dt = np.dtype([(\"name\", \"U10\"), (\"age\", \"i4\"), (\"height\", \"f4\")])\n",
    "people = np.array([(\"Alice\", 25, 1.65), (\"Bob\", 30, 1.80)], dtype=dt)\n",
    "print(people[\"age\"])  # 访问字段"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.605615700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(dt)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.608614600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'people' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "Cell \u001B[1;32mIn[4], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[43mpeople\u001B[49m)\n",
      "\u001B[1;31mNameError\u001B[0m: name 'people' is not defined"
     ]
    }
   ],
   "source": [
    "print(people)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.611612400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 创建三维数组\n",
    "a3 = np.array([[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14],\n",
    "                                                   [15, 16, 17]],\n",
    "               [[18, 19, 20], [21, 22, 23], [24, 25, 26]]])\n",
    "print(a3)\n",
    "print('数组的维度：', a3.ndim)\n",
    "print('数组的形状：', a3.shape)\n",
    "print('数据的长度：', a3.size)\n",
    "print('数组中的元素的类型：', a3.dtype)\n",
    "print('数组中的每个元素所占的字节：', a3.itemsize)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.615610300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a = np.arange(24)\n",
    "print(a)\n",
    "print(a.reshape(3, 2, -1))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.619608Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a4 = np.arange(12).reshape(3, 4)\n",
    "b4 = np.arange(12, 24).reshape(3, 4)\n",
    "print(\"a4\", a4)\n",
    "print(\"b4\", b4)\n",
    "print(\"垂直合并\", np.vstack((a4, b4)))\n",
    "print(\"水平合并\", np.hstack((a4, b4)))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.622606200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a4 = np.arange(12).reshape(3, 4)\n",
    "b4 = np.arange(12, 24).reshape(3, 4)\n",
    "print(\"a4\", a4)\n",
    "print(\"b4\", b4)\n",
    "print(\"第一轴合并：\", np.concatenate([a4, b4], axis=0))\n",
    "print(\"第二轴合并：\", np.concatenate([a4, b4], axis=1))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.626603700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a5 = np.arange(36).reshape(2, 2, 9)\n",
    "print(\"a5\", a5)\n",
    "b5, c5, d5 = np.split(a5, [3, 6], axis=2)\n",
    "print(\"b5\", b5)\n",
    "print(\"c5\", c5)\n",
    "print(\"d5\", d5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.630601200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a = np.arange(10)\n",
    "print(a)\n",
    "print(a[2])\n",
    "print(a[-2])\n",
    "print(a[1:5])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.634599200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a2 = np.arange(12).reshape(3, 4)\n",
    "print(a2)\n",
    "print(a2[1, 2])\n",
    "print(a2[1:3, 2:4])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.637598Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a = np.array([10, 20, 30, 40])\n",
    "b = np.array([1, 2, 3, 4])\n",
    "print(a - 1)\n",
    "print(b * 10)\n",
    "print(a + b)\n",
    "print(a - b)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.642594700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "a = np.array([10, 20, 30, 40])\n",
    "b = np.array([1, 2, 3, 4])\n",
    "print(a.max())\n",
    "print(a.min())\n",
    "print(a.sum())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.647592100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "s1 = pd.Series([2, 'b', 3.4, 6])\n",
    "print(s1)  # 打印Series\n",
    "print(s1.index)  # 打印索引\n",
    "print(s1.values)  # 打印值"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.652589Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "s2 = pd.Series([2, 'b', 3.4, 6], index=['a', 'b', 'c', 'd'])\n",
    "print(s2)  # 打印Series\n",
    "print(s2.index)  # 打印索引\n",
    "print(s2.values)  # 打印值"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.655587500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "s3 = pd.Series({'语文': 90, '数学': 100, '英语': 66})\n",
    "print(s3)  # 打印Series\n",
    "print(s3.index)  # 打印索引\n",
    "print(s3.values)  # 打印值"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.660584100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 查询数据\n",
    "print(s2['c'], type(s2['c']))\n",
    "print(s2[['a', 'b', 'c']], type(s2[['a', 'b', 'c']]))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.668579800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "d = {'学号': [1, 2, 3],\n",
    "     '姓名': ['张三', '李四', '王五'],\n",
    "     '语文': [99, 88, 77],\n",
    "     '数学': [44, 55, 55],\n",
    "     '英语': [88, 55, 66]}\n",
    "df = pd.DataFrame(d)\n",
    "print(df)\n",
    "print(df.dtypes)\n",
    "print(df.columns)  # 列索引\n",
    "print(df.index)  # 行索引"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.673576400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df['姓名'], type(df['姓名']))  # 查询一列，返回Series\n",
    "print(df['姓名'][1])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.677574100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df[['姓名', '语文']], type(df[['姓名', '语文']]))  # 查询两列，返回DataFrame"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.683570600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df.loc[1], type(df.loc[1]))  # 查询一行,返回Series"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.686569Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df.loc[1]['姓名'], df.loc[1]['语文'])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.689566800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df.loc[[0, 2]], type(df.loc[[0, 2]]))  # 查询多行，返回DataFrame"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.693565100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df.loc[1:2])  # 查询多行，支持切片"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.697562400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 导入pandas库，去别名pd\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "print(df)\n",
    "print(df.dtypes)  # 查看每列的数据类型"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.702560600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df['姓名'], type(df['姓名']))  # 获取姓名列，以及类型\n",
    "print(df['姓名'].dtype)  # 查询姓名数据类型\n",
    "print(df['语文分数'].dtype)  # 查询语文分数类型"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.705559600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df['语文分数'].dtype == np.int64)\n",
    "print(df['数学分数'].dtype == np.int64)\n",
    "print(pd.api.types.is_object_dtype(df['语文分数'].dtype))\n",
    "print(pd.api.types.is_object_dtype(df['数学分数'].dtype))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.707558400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.710556100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      name  age         city\n",
      "0    Alice   25     New York\n",
      "1      Bob   30  Los Angeles\n",
      "2  Charlie   35      Chicago\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_json('file.json')\n",
    "print(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-08-25T07:18:10.262664700Z",
     "start_time": "2025-08-25T07:17:47.715553Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "错误: 发生了未知错误: Missing optional dependency 'lxml'.  Use pip or conda to install lxml.\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "try:\n",
    "    # 读取 HTML 文件\n",
    "    dfs = pd.read_html('example.html')\n",
    "    # 打印每个表格的基本信息和内容\n",
    "    for i, df in enumerate(dfs):\n",
    "        print(f\"第 {i + 1} 个表格基本信息：\")\n",
    "        df.info()\n",
    "        rows, columns = df.shape\n",
    "        if rows < 10:\n",
    "            print(f\"第 {i + 1} 个表格全部内容信息：\")\n",
    "            print(df.to_csv(sep='\\t', na_rep='nan'))\n",
    "        else:\n",
    "            print(f\"第 {i + 1} 个表格前几行内容信息：\")\n",
    "            print(df.head().to_csv(sep='\\t', na_rep='nan'))\n",
    "except FileNotFoundError:\n",
    "    print(\"错误: 文件未找到，请检查文件路径。\")\n",
    "except ValueError:\n",
    "    print(\"错误: 文件中未找到有效的表格。\")\n",
    "except Exception as e:\n",
    "    print(f\"错误: 发生了未知错误: {e}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-08-25T07:18:10.262664700Z",
     "start_time": "2025-08-25T07:17:47.721549900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "ename": "EmptyDataError",
     "evalue": "No columns to parse from file",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mStopIteration\u001B[0m                             Traceback (most recent call last)",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\python_parser.py:404\u001B[0m, in \u001B[0;36mPythonParser._infer_columns\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m    403\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 404\u001B[0m     line \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_buffered_line\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    406\u001B[0m     \u001B[38;5;28;01mwhile\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mline_pos \u001B[38;5;241m<\u001B[39m\u001B[38;5;241m=\u001B[39m hr:\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\python_parser.py:637\u001B[0m, in \u001B[0;36mPythonParser._buffered_line\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m    636\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 637\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_next_line\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\python_parser.py:738\u001B[0m, in \u001B[0;36mPythonParser._next_line\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m    737\u001B[0m \u001B[38;5;28;01mwhile\u001B[39;00m \u001B[38;5;28;01mTrue\u001B[39;00m:\n\u001B[1;32m--> 738\u001B[0m     orig_line \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_next_iter_line\u001B[49m\u001B[43m(\u001B[49m\u001B[43mrow_num\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mpos\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m+\u001B[39;49m\u001B[43m \u001B[49m\u001B[38;5;241;43m1\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[0;32m    739\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpos \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\python_parser.py:805\u001B[0m, in \u001B[0;36mPythonParser._next_iter_line\u001B[1;34m(self, row_num)\u001B[0m\n\u001B[0;32m    804\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdata \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m--> 805\u001B[0m line \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mnext\u001B[39;49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdata\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    806\u001B[0m \u001B[38;5;66;03m# for mypy\u001B[39;00m\n",
      "\u001B[1;31mStopIteration\u001B[0m: ",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001B[1;31mEmptyDataError\u001B[0m                            Traceback (most recent call last)",
      "Cell \u001B[1;32mIn[7], line 3\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21;01mpandas\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mas\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21;01mpd\u001B[39;00m\n\u001B[1;32m----> 3\u001B[0m df \u001B[38;5;241m=\u001B[39m \u001B[43mpd\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mread_clipboard\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m      4\u001B[0m \u001B[38;5;28mprint\u001B[39m(df)\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\clipboards.py:129\u001B[0m, in \u001B[0;36mread_clipboard\u001B[1;34m(sep, dtype_backend, **kwargs)\u001B[0m\n\u001B[0;32m    123\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(sep) \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m \u001B[38;5;129;01mand\u001B[39;00m kwargs\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mengine\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mc\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[0;32m    124\u001B[0m     warnings\u001B[38;5;241m.\u001B[39mwarn(\n\u001B[0;32m    125\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mread_clipboard with regex separator does not work properly with c engine.\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m    126\u001B[0m         stacklevel\u001B[38;5;241m=\u001B[39mfind_stack_level(),\n\u001B[0;32m    127\u001B[0m     )\n\u001B[1;32m--> 129\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m read_csv(StringIO(text), sep\u001B[38;5;241m=\u001B[39msep, dtype_backend\u001B[38;5;241m=\u001B[39mdtype_backend, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1026\u001B[0m, in \u001B[0;36mread_csv\u001B[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001B[0m\n\u001B[0;32m   1013\u001B[0m kwds_defaults \u001B[38;5;241m=\u001B[39m _refine_defaults_read(\n\u001B[0;32m   1014\u001B[0m     dialect,\n\u001B[0;32m   1015\u001B[0m     delimiter,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m   1022\u001B[0m     dtype_backend\u001B[38;5;241m=\u001B[39mdtype_backend,\n\u001B[0;32m   1023\u001B[0m )\n\u001B[0;32m   1024\u001B[0m kwds\u001B[38;5;241m.\u001B[39mupdate(kwds_defaults)\n\u001B[1;32m-> 1026\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43m_read\u001B[49m\u001B[43m(\u001B[49m\u001B[43mfilepath_or_buffer\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mkwds\u001B[49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:620\u001B[0m, in \u001B[0;36m_read\u001B[1;34m(filepath_or_buffer, kwds)\u001B[0m\n\u001B[0;32m    617\u001B[0m _validate_names(kwds\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mnames\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28;01mNone\u001B[39;00m))\n\u001B[0;32m    619\u001B[0m \u001B[38;5;66;03m# Create the parser.\u001B[39;00m\n\u001B[1;32m--> 620\u001B[0m parser \u001B[38;5;241m=\u001B[39m TextFileReader(filepath_or_buffer, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwds)\n\u001B[0;32m    622\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m chunksize \u001B[38;5;129;01mor\u001B[39;00m iterator:\n\u001B[0;32m    623\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m parser\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1620\u001B[0m, in \u001B[0;36mTextFileReader.__init__\u001B[1;34m(self, f, engine, **kwds)\u001B[0m\n\u001B[0;32m   1617\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39moptions[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mhas_index_names\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m kwds[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mhas_index_names\u001B[39m\u001B[38;5;124m\"\u001B[39m]\n\u001B[0;32m   1619\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mhandles: IOHandles \u001B[38;5;241m|\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m-> 1620\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_engine \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_make_engine\u001B[49m\u001B[43m(\u001B[49m\u001B[43mf\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mengine\u001B[49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1898\u001B[0m, in \u001B[0;36mTextFileReader._make_engine\u001B[1;34m(self, f, engine)\u001B[0m\n\u001B[0;32m   1895\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(msg)\n\u001B[0;32m   1897\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m-> 1898\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m mapping[engine](f, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39moptions)\n\u001B[0;32m   1899\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m:\n\u001B[0;32m   1900\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mhandles \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\python_parser.py:133\u001B[0m, in \u001B[0;36mPythonParser.__init__\u001B[1;34m(self, f, **kwds)\u001B[0m\n\u001B[0;32m    127\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_col_indices: \u001B[38;5;28mlist\u001B[39m[\u001B[38;5;28mint\u001B[39m] \u001B[38;5;241m|\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m    128\u001B[0m columns: \u001B[38;5;28mlist\u001B[39m[\u001B[38;5;28mlist\u001B[39m[Scalar \u001B[38;5;241m|\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m]]\n\u001B[0;32m    129\u001B[0m (\n\u001B[0;32m    130\u001B[0m     columns,\n\u001B[0;32m    131\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mnum_original_columns,\n\u001B[0;32m    132\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39munnamed_cols,\n\u001B[1;32m--> 133\u001B[0m ) \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_infer_columns\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    135\u001B[0m \u001B[38;5;66;03m# Now self.columns has the set of columns that we will process.\u001B[39;00m\n\u001B[0;32m    136\u001B[0m \u001B[38;5;66;03m# The original set is stored in self.original_columns.\u001B[39;00m\n\u001B[0;32m    137\u001B[0m \u001B[38;5;66;03m# error: Cannot determine type of 'index_names'\u001B[39;00m\n\u001B[0;32m    138\u001B[0m (\n\u001B[0;32m    139\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns,\n\u001B[0;32m    140\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mindex_names,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m    145\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mindex_names,  \u001B[38;5;66;03m# type: ignore[has-type]\u001B[39;00m\n\u001B[0;32m    146\u001B[0m )\n",
      "File \u001B[1;32mD:\\PythonStudy\\my-pandas\\venv\\lib\\site-packages\\pandas\\io\\parsers\\python_parser.py:432\u001B[0m, in \u001B[0;36mPythonParser._infer_columns\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m    429\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m columns, num_original_columns, unnamed_cols\n\u001B[0;32m    431\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mnames:\n\u001B[1;32m--> 432\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m EmptyDataError(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo columns to parse from file\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21;01merr\u001B[39;00m\n\u001B[0;32m    434\u001B[0m     line \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mnames[:]\n\u001B[0;32m    436\u001B[0m this_columns: \u001B[38;5;28mlist\u001B[39m[Scalar \u001B[38;5;241m|\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m] \u001B[38;5;241m=\u001B[39m []\n",
      "\u001B[1;31mEmptyDataError\u001B[0m: No columns to parse from file"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_clipboard()\n",
    "print(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.727546100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 没有列头的csv文件处理 header设置None，names指定列名称\n",
    "df = pd.read_csv('student_scores2.csv', header=None,\n",
    "                 names=['姓名', '学号', '出生日期', '性别', '语文分数', '数学分数', '英语分数'])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.950418200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.953417100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv', index_col='学号')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.957414100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.961411500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.index"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.965409500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv', usecols=['学号', '语文分数'])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.968407500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.972405400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df = pd.read_csv('student_scores.csv', nrows=10)\n",
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.976402700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df = pd.read_csv('student_scores.csv', keep_default_na=True, na_values=['X', ''])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.981400200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.984398300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "d = {'学号': [1, 2, 3],\n",
    "     '姓名': ['张三', '李四', '王五'],\n",
    "     '语文': [99, 88, 77],\n",
    "     '数学': [44, 55, 55],\n",
    "     '英语': [88, 55, 66]}\n",
    "df = pd.DataFrame(d)\n",
    "print(df)\n",
    "df.to_csv('out/student.csv')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.989395400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.to_csv('out/student2.csv', index=False)  # 不需要行索引"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.994392800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.to_csv('out/student3.csv', index=False, header=False)  # 不需要行索引,不要表头"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:47.999389500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.to_csv('out/student4.csv', index=False, columns=['姓名', '语文'])  # 不需要行索引,不要表头"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.003387300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "d = {'学号': [1, 2, 3],\n",
    "     '姓名': ['张三', '李四', '王五'],\n",
    "     '语文': [99, 88, 77],\n",
    "     '数学': [44, 55, 55],\n",
    "     '英语': [88, 55, 66]}\n",
    "df = pd.DataFrame(d)\n",
    "df.to_excel('out/student.xlsx', sheet_name=\"我的标签页\", index=False)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.007385600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 导入pandas库，去别名pd\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "print(df.index, type(df.index))\n",
    "print(df.columns, type(df.columns))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.011382800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df = pd.DataFrame(np.arange(1, 16).reshape(5, 3))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.015380500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.018378400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df = pd.DataFrame(np.arange(1, 16).reshape(5, 3), index=list('abcde'), columns=list('xyz'))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.023375900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.032371300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv', index_col='学号')\n",
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.036368600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.set_index('姓名')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.041366100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv', index_col='学号')\n",
    "print('索引对象', df.index)\n",
    "print('索引名称', df.index.name)\n",
    "print('索引array数组', df.index.array)\n",
    "print('索引数据类型', df.index.dtype)\n",
    "print('索引形状', df.index.shape)\n",
    "print('索引元素长度', df.index.size)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.045363400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv', index_col='学号')  # 指定索引\n",
    "df.reset_index()  # 重置索引"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.051360300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 导入pandas库，取别名pd\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "print('行索引对象：', df.index)\n",
    "print('列索引对象：', df.columns)\n",
    "print('维度：', df.ndim)\n",
    "print('数据量大小 2维的话 就是行*列：', df.size)\n",
    "print('判断是否空：', df.empty)\n",
    "print('列名：', df.keys())\n",
    "xuehao = df.get('学号')\n",
    "print(xuehao, type(xuehao))\n",
    "print('列名：', xuehao.name)\n",
    "print('列数据：', xuehao.array)\n",
    "print('类型：', xuehao.dtype)\n",
    "print('是否有空值：', xuehao.hasnans)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.055357900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.set_index('姓名').mean(axis=1, numeric_only=True).head(3)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.060354800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "s1 = pd.Series([5, 7, 1, 6, 9, 2])\n",
    "s1.diff()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.064352600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 导入pandas库，去别名pd\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.loc[:10, '语文分数':'数学分数'].diff()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.068349800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.loc[:10, '语文分数':'数学分数'].diff(1, axis=1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.074346300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 导入pandas库，去别名pd\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.shift(2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.079344200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 导入pandas库，去别名pd\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df['语文排名'] = df['语文分数'].rank(method='min', ascending=False)\n",
    "df['语文排名'] = df['语文排名'].astype(int)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.085339700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.091336300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.姓名\n",
    "df['学号']"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.096334700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.姓名"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.101331400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df['学号']"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.106328200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df[1:5:1]  # 获取第2行到第5行数据，默认步长1\n",
    "df[:5]  # 前5行数据\n",
    "df[:]  # 获取所有数据\n",
    "df[1:5:2]  # 步长2\n",
    "df[::-1]  # 反转顺序"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.110325400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df[['姓名', '语文分数']]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.115323200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.loc[1]  # 返回索引标签是1的行 返回类型是Series\n",
    "df.loc[[2, 4, 8]]  # 返回索引标签是2，4,8的行\n",
    "df.loc[:]  # 查询所有\n",
    "df.loc[2:10]  # 查询索引标签2到10的行，包括10"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.119322700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.set_index('姓名').loc['钱mv']"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.123318Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.loc[2:10, ['姓名', '语文分数']]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.128315Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.loc[2:10, '学号':]  # 获取学号后面所有列"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.132313200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.iloc[:5]  # 前5行\n",
    "df.iloc[:]  # 所有数据\n",
    "df.iloc[2:10:2]  # 2到10，步长2\n",
    "df.iloc[2:10, [1, 3]]  # 2到10，第1和第3列\n",
    "df.iloc[2:10, 2:]  # 2到10，第2列及后面"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.137311Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.at[2, '学号']\n",
    "df.姓名.at[2]\n",
    "df.loc[2].at['姓名']\n",
    "df.set_index('学号').at['S80742', '姓名']\n",
    "df.set_index('学号').姓名.at['S80742']"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.142307800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.iat[2, 0]\n",
    "df.loc[2].iat[0]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.146304900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.get('姓名', '没找到')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.151302600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.get('姓名2', '没找到')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.156299700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.set_index('学号').姓名.get('S80742', '没找到')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.160297500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('student_scores.csv')\n",
    "df.truncate(2, 5)  # 截取第2到第5行 默认行方向截取"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.164295700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.姓名.truncate(2, 5)  # Series也可以截取"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.170291500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.set_index('姓名').语文分数 >= 60"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.174290Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.set_index('姓名').loc[:, ['语文分数', '数学分数', '英语分数']] >= 60"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.178287500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "~(df.数学分数 < 60) & (df['性别'] == '男')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.185283Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df[df['语文分数'] >= 60]  # 查询语文及格的人"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.189280900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df[~(df['语文分数'] >= 60)]  #查询语文不及格的人"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.193279400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df[df.语文分数 > df.数学分数]  # 查询语文分数高于数学分数的人"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.199275700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.loc[df['语文分数'] >= 60, '语文分数']  # 查询语文及格的人，只显示索引名称和语文分数列"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.203272200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.query('(语文分数>90)and(性别==\"男\")')  # 语文分数90分以上的男生"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.207270300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "avg = df.语文分数.mean()  # 求平均分\n",
    "df.query('语文分数>@avg')  # 语文分数超过平均分的人"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.211267900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.filter(items=['语文分数', '数学分数'])  # 选择列"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.216265800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.filter(regex='孙', axis=0)  # 所有孙姓数据"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.221262500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.filter(regex='吴', axis=0)  # 所有孙姓数据"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.225260300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.231259Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.filter(regex='孙', axis=0)  # 所有孙姓数据"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.235254100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.filter(like='分数')  # 列名包含分数的列"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.239251700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df.index)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.244248900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.set_index('姓名')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.248247Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.filter(regex='孙', axis=0)  # 所有孙姓数据"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.252245800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.reindex()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.257242200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.filter(regex='孙', axis=0)  # 所有孙姓数据"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.263238200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df[df['姓名'].str.contains('孙')]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.268235300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.set_index('姓名')\n",
    "print(df.index)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.272233Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.277230Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.set_index('姓名')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.283227200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({\n",
    "    'A': ['1', '2', '3'],\n",
    "    'B': [4, 5, 6],\n",
    "    'C': ['2025-01-01', '2025-02-01', '2025-03-01']\n",
    "})\n",
    "print(df.dtypes)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.287225100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df['A'] = df['A'].astype(int)  # 转换为整数\n",
    "df['B'] = df['B'].astype(float)  # 转换为浮点数"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.294220700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.298217800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "s = pd.Series(['1', '2', 'apple', '4'])\n",
    "s_numeric = pd.to_numeric(s, errors='coerce')  # 无效值转为 NaN"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.303214600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "s_numeric"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.307212500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "s_ignore = pd.to_numeric(s, errors='ignore')  # 保留原数据"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.312210Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "s = pd.Series([1.0, 2.0, 3.0])\n",
    "s = pd.to_numeric(s, downcast='integer')  # 转换为最小整数类型 (int8/int16/...)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.317206800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "s"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.321205Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "s.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.325202400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({\n",
    "    'A': ['1', '2', '3'],\n",
    "    'B': [4, 5, 6],\n",
    "    'C': ['2025-01-01', '2025-02-01', 'ccc']\n",
    "})\n",
    "df['C'] = pd.to_datetime(df['C'], errors='coerce')  # 转换为 datetime 类型"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.330199600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.334197400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.337196800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({\n",
    "    'A': ['1', '2', '3'],\n",
    "    'B': [4, 5, 6],\n",
    "    'C': ['2025-01-01', '2025-02-01', 'ccc']\n",
    "})\n",
    "df['D'] = ['10', None, '30']\n",
    "df['D'] = df['D'].fillna(0).astype(int)  # 填充 NaN 后转为整数"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.342192800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.347189500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({\n",
    "    'A': ['1', '2', '3'],\n",
    "    'B': [4, 5, 6]\n",
    "})\n",
    "df = df.convert_dtypes()  # 自动推断更合适的数据类型"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.351189100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.358183400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.364180Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = {\n",
    "    'Name': ['Alice', 'Bob', 'Charlie', 'David'],\n",
    "    'Age': [25, 30, 22, 28],\n",
    "    'Score': [85, 95, 75, 88]\n",
    "}\n",
    "df = pd.DataFrame(data)\n",
    "# 单列升序排序\n",
    "df_sorted = df.sort_values(by='Age')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.368177500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.372175Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.376172700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 多列排序：先按 Score 降序，再按 Age 升序\n",
    "df_sorted = df.sort_values(by=['Score', 'Age'], ascending=[False, True])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.381170400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.384169600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = {\n",
    "    'Name': ['Alice', 'Bob', 'Charlie', 'David'],\n",
    "    'Age': [25, 30, 22, 28],\n",
    "    'Score': [85, 95, 75, 88]\n",
    "}\n",
    "df = pd.DataFrame(data)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.389165300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.393163300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted_index = df.sort_index(ascending=False)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.398161Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted_index"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.404157200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted_columns = df.sort_index(axis=1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.409154300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted_columns"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.414151800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df_with_nan = pd.DataFrame({'A': [2, None, 1]})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.422147300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_with_nan"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.426144800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted = df_with_nan.sort_values(by='A', na_position='first')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.432141100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.437138200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = {\n",
    "    'Name': ['Alice', 'Bob', 'Charlie', 'David'],\n",
    "    'Age': [25, 30, 22, 28],\n",
    "    'Score': [85, 95, 75, 88]\n",
    "}\n",
    "df = pd.DataFrame(data)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.444134500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.457128800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted = df.sort_values(by='Age')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.466123200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.473118200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_sorted.reset_index(drop=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.482112Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.487109100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.491107100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.iloc[0, 0] = '老六'"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.497103900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.501101Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.loc[df.语文分数 < 60, '语文分数'] = 60"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.505098700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.509096700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = {\n",
    "    'Name': ['Alice', 'Bob', 'Charlie', 'David'],\n",
    "    'Age': [25, 30, 22, 28],\n",
    "    'Score': [85, 95, 75, 88]\n",
    "}\n",
    "df = pd.DataFrame(data)\n",
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.514094300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "v = [100, 100, 100, 100]\n",
    "df.Score = v"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.518091600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.523088500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# 示例数据\n",
    "df = pd.DataFrame({\n",
    "    'A': [1, 2, 3, 4],\n",
    "    'B': ['a', 'b', 'c', 'd'],\n",
    "    'C': [0, 1, np.nan, 1]\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.527086600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.533084100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.replace(0, -1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.538080100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.541078Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.replace([0, 1], [-1, -2])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.547075300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.replace({'A': {1: 100}, 'C': {0: -1}})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.552072500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# 示例数据\n",
    "df = pd.DataFrame({\n",
    "    'A': [1, 2, 3, 4],\n",
    "    'B': ['a', 'b', 'c', 'd'],\n",
    "    'C': [0, 1, np.nan, 1]\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.556069400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.replace(np.nan, 0)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.559067800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# 示例数据\n",
    "df = pd.DataFrame({\n",
    "    'A': [1, 2, 3, 4],\n",
    "    'B': ['a', 'b', 'c', 'd'],\n",
    "    'C': [0, 1, np.nan, 1]\n",
    "})\n",
    "# 多层字典结构：不同列使用不同规则\n",
    "df.replace({\n",
    "    'A': {1: 100, 2: 200},  # A列：1→100，2→200\n",
    "    'B': {'a': 'Alpha'}  # B列：a→Alpha\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.564064900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.567063100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df = pd.DataFrame({\n",
    "    'A': [1, 2, 3, 4],\n",
    "    'B': ['a', 'b', 'c', 'd'],\n",
    "    'C': [0, 1, np.nan, 1]\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.572060300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.575058900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.replace({\n",
    "    0: 'Zero',  # 全局将0替换为'Zero'\n",
    "    'a': 'Alpha'  # 全局将'a'替换为'Alpha'\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.579057500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# 示例数据\n",
    "df = pd.DataFrame({\n",
    "    'A': [1, np.nan, 3, np.nan],\n",
    "    'B': ['x', np.nan, 'y', np.nan],\n",
    "    'C': [np.nan, 5, np.nan, 7]\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.582055Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.585052500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.fillna(0)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.587051400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.590049800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.fillna({'A': 0, 'B': 'missing'})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.593048700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.596046900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.fillna(method='ffill')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.600044400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "df['总分'] = df.语文分数 + df.数学分数 + df.英语分数"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.603042700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.606040700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "df.insert(4, '总分', df.语文分数 + df.数学分数 + df.英语分数)  # 在第5列插入总分字段"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.609040200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.613037Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "df_new = df.assign(总分=df.语文分数 + df.数学分数 + df.英语分数)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.616035300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_new"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.618034100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "df.eval('总分=语文分数 + 数学分数 + 英语分数')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.621032600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "df.loc[100] = ['老六', 'S1111', '1999-11-11', '男', 10, 10, 10]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.624030700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.627029100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.loc[101] = {'姓名': '老七', '学号': 'S11122', '性别': '男'}"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.631026800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.634024900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.loc[len(df)] = {'姓名': '老八', '学号': 'S11123', '性别': '女'}"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.638022700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.641021300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 示例数据\n",
    "df1 = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']})\n",
    "df2 = pd.DataFrame({'A': [3, 4], 'B': ['z', 'w']})\n",
    "# 沿行方向合并（默认axis=0）\n",
    "result = pd.concat([df1, df2], ignore_index=True)\n",
    "print(result)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.644019100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df3 = pd.DataFrame({'C': [10, 20], 'D': [True, False]})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.647018100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df3"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.651015400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.654015800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = pd.concat([df1, df3], axis=1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.657011500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.660009800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 创建示例 DataFrame\n",
    "df = pd.DataFrame({\n",
    "    'A': [1, 2, 3],\n",
    "    'B': ['x', 'y', 'z'],\n",
    "    'C': [10, 20, 30]\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.664009100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.668005200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "popped_column = df.pop('B')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.674004500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.677001900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(popped_column)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.680000700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "popped_column"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.682997900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 创建示例 DataFrame\n",
    "df = pd.DataFrame({\n",
    "    'A': [1, 2, 3],\n",
    "    'B': ['x', 'y', 'z'],\n",
    "    'C': [10, 20, 30]\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.685997Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.687994900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped = df.drop(columns='B')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.689993300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.691992100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped = df.drop(labels='B', axis=1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.693991300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.696988900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.698987700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped = df.drop(labels='B', axis=1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.701986300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.705983900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped = df.drop(labels='B', axis=1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.707982500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped = df.drop(index=0)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.709981800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.711980400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.713978800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped = df.drop(labels=0, axis=0)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.715977700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_dropped"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.717977Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "data = {\n",
    "    'A': [1, np.nan, 3, 4],\n",
    "    'B': [np.nan, 2, np.nan, 5],\n",
    "    'C': [7, 8, 9, 10]\n",
    "}\n",
    "df = pd.DataFrame(data)\n",
    "print(\"原始数据：\")\n",
    "print(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.720975Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 删除所有含有NaN的行\n",
    "df_cleaned = df.dropna()\n",
    "print(\"删除含有NaN的行：\")\n",
    "print(df_cleaned)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.722973800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.724972500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 添加一行全为NaN的数据\n",
    "df.loc[4] = [np.nan, np.nan, np.nan]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.727970800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.729969800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 删除全为NaN的行\n",
    "df_cleaned = df.dropna(how='all')\n",
    "print(\"删除全为NaN的行：\")\n",
    "print(df_cleaned)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.731968400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 保留至少有2个非NaN值的行\n",
    "df_cleaned = df.dropna(thresh=2)\n",
    "print(\"保留至少2个非NaN值的行：\")\n",
    "print(df_cleaned)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.733967400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 只在列A和B中检查缺失值，列C的NaN不影响结果\n",
    "df_cleaned = df.dropna(subset=['A', 'B'])\n",
    "print(\"仅检查A和B列的缺失值：\")\n",
    "print(df_cleaned)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.736965800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "df = df.select_dtypes(include='number')  # 只保留数据类型的列"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.738964600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.741962900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "df = df.select_dtypes(include='number')\n",
    "df.where(df >= 60)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.743961700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.where(lambda d: d.数学分数 > 90)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.746959900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.where(df >= 60, '不及格')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.748958900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "df = df.select_dtypes(include='number')\n",
    "df.mask(df >= 60)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.750957800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for name in df.姓名:\n",
    "    print(name)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.751957200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for name in df.姓名:\n",
    "    print(name)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.753957400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for i, n, m in zip(df.index, df.姓名, df.数学分数):\n",
    "    print(i, n, m)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.755956400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 创建示例 DataFrame\n",
    "data = {'A': [1, 2, 3], 'B': ['x', 'y', 'z']}\n",
    "df = pd.DataFrame(data)\n",
    "# 遍历每一行\n",
    "for idx, row in df.iterrows():\n",
    "    print(f\"Index: {idx}, A: {row['A']}, B: {row['B']}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.757955300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for row in df.itertuples():\n",
    "    print(row)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.759953800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for row in df.itertuples(index=False):\n",
    "    print(row)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.761953100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for row in df.itertuples():\n",
    "    print(row.Index, row.姓名)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.763951900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for row in df.itertuples():\n",
    "    if row.语文分数 > 90 and row.数学分数 > 80:\n",
    "        print(row.姓名, row.语文分数, row.数学分数)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.766949900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for col_name, col_data in df.items():\n",
    "    print(col_name)\n",
    "    print('----')\n",
    "    print(col_data)\n",
    "    print('=========================')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.768948300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_excel('student_scores.xlsx')  # 读取excel，返回DataFrame数据集对象\n",
    "for col_name, col_data in df.items():\n",
    "    if col_name == '语文分数' or col_name == '数学分数' or col_name == '英语分数':\n",
    "        print(col_name, '平均分', col_data.mean(), '最高分', col_data.max(), '最低分', col_data.min())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.770947600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})\n",
    "df.apply(np.sum)  # 输出：A:3, B:7"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.771946600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.apply(lambda row: row['A'] + row['B'], axis=1)  # 输出行求和：4, 6"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.774945Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 创建一个简单的 DataFrame\n",
    "data = {'A': [1, None, 3], 'B': [None, 5, 6]}\n",
    "df = pd.DataFrame(data)\n",
    "\n",
    "\n",
    "# 自定义函数\n",
    "def remove_na(df):\n",
    "    return df.fillna(0)\n",
    "\n",
    "\n",
    "# 自定义一个函数\n",
    "def add_column(df):\n",
    "    df['C'] = df['A'] + df['B']\n",
    "    return df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.776942900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "remove_na(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.778943900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.780942500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 使用 pipe() 方法\n",
    "df_new = df.pipe(remove_na).pipe(add_column)\n",
    "print(df_new)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.782940700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})\n",
    "df.map(lambda x: x * 2)  # 所有元素翻倍"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.784939600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "s = pd.Series([1, 2, 3])\n",
    "s.map(lambda x: x * 2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.786938300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = {\n",
    "    'Price': [100, 150, 80, 200],\n",
    "    'Quantity': [3, 2, 5, 4]\n",
    "}\n",
    "df = pd.DataFrame(data)\n",
    "# 对 'Price' 列计算均值、最大值和极差\n",
    "price_stats = df['Price'].agg(['mean', 'max', lambda x: x.max() - x.min()])\n",
    "print(price_stats)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.789936400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.791936200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 对 'Price' 求和和均值，对 'Quantity' 求最大值\n",
    "result = df.agg({\n",
    "    'Price': ['sum', 'mean'],\n",
    "    'Quantity': 'max'\n",
    "})\n",
    "print(result)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.794935800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = {\n",
    "    'Region': ['North', 'South', 'North', 'South'],\n",
    "    'Sales': [200, 150, 300, 250],\n",
    "    'Profit': [50, 30, 80, 40]\n",
    "}\n",
    "df = pd.DataFrame(data)\n",
    "# 按 'Region' 分组后聚合\n",
    "grouped = df.groupby('Region').agg({\n",
    "    'Sales': ['sum', 'mean'],\n",
    "    'Profit': lambda x: x.mean() / x.std()\n",
    "})\n",
    "print(grouped)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.797930900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 创建示例数据\n",
    "df = pd.DataFrame({\n",
    "    'Group': ['A', 'A', 'B', 'B', 'B'],\n",
    "    'Value': [1, 2, 3, 4, 5]\n",
    "})"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.800929500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.802928200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 计算每组的和，并广播到每个元素\n",
    "df['Group_Sum'] = df.groupby('Group')['Value'].transform('sum')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.804927Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.807925200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 创建示例数据\n",
    "df = pd.DataFrame({\n",
    "    'Group': ['A', 'A', 'B', 'B', 'B'],\n",
    "    'Value': [1, 2, 3, 4, 5]\n",
    "})\n",
    "\n",
    "\n",
    "def demean(x):\n",
    "    return x - x.mean()\n",
    "\n",
    "\n",
    "# 计算每组的均值，并从每个元素中减去\n",
    "df['Demeaned'] = df.groupby('Group')['Value'].transform(demean)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.809924100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.812923700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 原始数据\n",
    "df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})\n",
    "# 深拷贝（独立数据）\n",
    "df_deep = df.copy(deep=True)\n",
    "df_deep['A'][0] = 100  # 仅修改副本\n",
    "print(df)  # 原数据不变\n",
    "# 浅拷贝（共享数据）\n",
    "df_shallow = df.copy(deep=False)\n",
    "df_shallow['A'][0] = 100  # 修改会影响原数据！\n",
    "print(df)  # 原数据被修改"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.815920900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})\n",
    "df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})\n",
    "result = pd.concat([df1, df2])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.818919200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.821917700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = pd.concat([df1, df2], axis=1)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.823916300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.827914100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = pd.concat([df1, df2], ignore_index=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.830912300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.833910800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df3 = pd.DataFrame({'A': [5, 6], 'C': [9, 10]})\n",
    "result = pd.concat([df1, df3], join='inner')  # 仅保留共有列'A'"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.836909600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.839907300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({\n",
    "    'id': [1, 2, 3],\n",
    "    'name': ['Alice', 'Bob', 'Charlie']\n",
    "})\n",
    "df2 = pd.DataFrame({\n",
    "    'id': [2, 3, 4],\n",
    "    'age': [25, 30, 28]\n",
    "})\n",
    "# 按 'id' 列合并（默认 inner join）\n",
    "result = pd.merge(df1, df2, on='id')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.842905400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.845903800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = pd.merge(df1, df2, on='id', how='left')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.848902100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.850903700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = pd.merge(df1, df2, on='id', how='outer')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.852901600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.855898400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df3 = pd.DataFrame({\n",
    "    'user_id': [2, 3, 4],\n",
    "    'score': [90, 85, 88]\n",
    "})\n",
    "# 合并 df1 的 'id' 和 df3 的 'user_id'\n",
    "result = pd.merge(df1, df3, left_on='id', right_on='user_id')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.857899400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.859897100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df4 = pd.DataFrame({\n",
    "    'id': [2, 3],\n",
    "    'name': ['Bob', 'Charlie'],\n",
    "    'department': ['HR', 'Tech']\n",
    "})\n",
    "result = pd.merge(df1, df4, on='id', suffixes=('_old', '_new'))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.864892900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.866891600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df5 = pd.DataFrame({\n",
    "    'id': [1, 2, 2],\n",
    "    'year': [2022, 2023, 2023],\n",
    "    'sales': [100, 200, 150]\n",
    "})\n",
    "df6 = pd.DataFrame({\n",
    "    'id': [1, 2, 2],\n",
    "    'year': [2022, 2023, 2023],\n",
    "    'profit': [20, 40, 30]\n",
    "})\n",
    "result = pd.merge(df5, df6, on=['id', 'year'])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.869889800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.871889300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df5"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.873887500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df6"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.875886300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({'A': [1, 2, 3]}, index=['a', 'b', 'c'])\n",
    "df2 = pd.DataFrame({'B': [4, 5, 6]}, index=['b', 'c', 'd'])\n",
    "result = df1.join(df2)  # 默认左连接，保留 df1 所有索引"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.878886100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.880884900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = df1.join(df2, how='outer')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.882882300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.883882700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = df1.join(df2, how='inner')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.885881900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.887880900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df3 = pd.DataFrame({'A': [7, 8, 9]}, index=['b', 'c', 'd'])\n",
    "result = df1.join(df3, lsuffix='_left', rsuffix='_right')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.889878200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.891877200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df4 = pd.DataFrame({'key': ['a', 'b', 'c'], 'C': [10, 11, 12]})\n",
    "df5 = pd.DataFrame({'key': ['b', 'c', 'd'], 'D': [13, 14, 15]})\n",
    "# 将 df4 的 'key' 列作为合并键，与 df5 的索引对齐\n",
    "result = df4.join(df5.set_index('key'), on='key')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.894876200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.897873700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df6 = pd.DataFrame({'E': [16, 17, 18]}, index=['a', 'b', 'c'])\n",
    "df7 = pd.DataFrame({'F': [19, 20, 21]}, index=['b', 'c', 'd'])\n",
    "result = df1.join([df6, df7], how='outer')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.899872800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.901871400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df1 = pd.DataFrame({'A': [1, np.nan, 3], 'B': [np.nan, 5, 6]}, index=[0, 1, 2])\n",
    "df2 = pd.DataFrame({'A': [10, 20, 30], 'B': [40, 50, 60]}, index=[1, 2, 3])\n",
    "result = df1.combine_first(df2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.904871100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.907867800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.909866700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=[0, 1, 2])\n",
    "df2 = pd.DataFrame({'A': [10, 20, 30], 'B': [40, 50, 60]}, index=[1, 2, 3])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.911865600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.913864Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.915863300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = df1.combine(df2, lambda x, y: y)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.917861900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({\n",
    "    'A': [1, 2, 3],\n",
    "    'B': [4, 5, 6]\n",
    "})\n",
    "df2 = pd.DataFrame({\n",
    "    'A': [3, 2, 1],\n",
    "    'B': [6, 5, 4]\n",
    "})\n",
    "\n",
    "\n",
    "def diff(x, y):\n",
    "    return x - y"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.918861400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.920860200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.922859100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1 - df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.925857800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=[0, 1, 2])\n",
    "df2 = pd.DataFrame({'A': [10, 20, 30], 'B': [40, 50, 60]}, index=[1, 2, 3])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.928858100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.930856600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.932855200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result = df1.combine(df2, lambda x, y: x + y, fill_value=0)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.935852700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "result"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.937850900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=[0, 1, 2])\n",
    "other = pd.DataFrame({'A': [10, 20], 'B': [40, 50]}, index=[1, 2])\n",
    "df.update(other)  # 默认 overwrite=True\n",
    "print(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.939849100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.combine(other, lambda x, y: y)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.941848300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.update(other)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.944848700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.946845200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df = pd.DataFrame({'A': [1, np.nan, 3], 'B': [np.nan, 5, 6]})\n",
    "other = pd.DataFrame({'A': [10, 20, 30], 'B': [40, 50, 60]})\n",
    "df.update(other, overwrite=False)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.948843900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.950843Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=[0, 1])\n",
    "df2 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=[0, 1])\n",
    "print(df1.equals(df2))  # True"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.952841700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df5 = pd.DataFrame({'B': [3, 4], 'A': [1, 2]}, index=[0, 1])  # 列顺序不同\n",
    "df6 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=[1, 0])  # 索引顺序不同"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.954840500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df5"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.957839100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df6"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.958838500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.961839300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1.equals(df5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.964835600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1.equals(df6)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.967833200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df7 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, dtype=int)\n",
    "df8 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, dtype=float)\n",
    "print(df7.equals(df8))  # False（数据类型不同）"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.970831900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n",
    "df2 = pd.DataFrame({\"A\": [1, 20, 3], \"B\": [4, 50, 60]})\n",
    "diff = df1.compare(df2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.973830100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "diff"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.976828400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.979826800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.981825300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n",
    "df2 = pd.DataFrame({\"A\": [1, 20, 3], \"B\": [4, 50, 60]})\n",
    "diff = df1.compare(df2, align_axis=0)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.984823500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.988823700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.990821200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "diff"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.992818400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "diff2 = df1.compare(df2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.995817300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "diff2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.997816Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n",
    "df2 = pd.DataFrame({\"A\": [1, 20, 3], \"B\": [4, 50, 60]})\n",
    "diff = df1.compare(df2, keep_shape=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:48.999814800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "diff"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.001815200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df1 = pd.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n",
    "df2 = pd.DataFrame({\"A\": [1, 20, 3], \"B\": [4, 50, 60]})\n",
    "diff = df1.compare(df2, keep_equal=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.004812500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "diff"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.007810400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.010808600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.012807500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "diff = df1.compare(df2, keep_shape=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.014806100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "diff"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.016805300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df = pd.DataFrame({\n",
    "    'A': [1, np.nan, 3],\n",
    "    'B': ['x', None, 'z'],\n",
    "    'C': [pd.NaT, pd.Timestamp('2023'), None]\n",
    "})\n",
    "# 检测所有缺失值\n",
    "print(df.isna())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.019803600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df.isna().sum())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.021802400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.023801200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df.isna())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.026799100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(df.isna().sum())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.028798700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "data = {\n",
    "    'A': [1, np.nan, 3, 4],\n",
    "    'B': [np.nan, 5, None, 7],\n",
    "    'C': ['x', 'y', np.nan, 'z']\n",
    "}\n",
    "df = pd.DataFrame(data)\n",
    "print(\"原始数据：\")\n",
    "print(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.031797400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.dropna(thresh=2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.033795500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = {\n",
    "    'ID': [101, 102, 103, 101, 102],\n",
    "    'Name': ['Alice', 'Bob', 'Charlie', 'Alice', 'Bob'],\n",
    "    'Score': [85, 90, 95, 85, 90]\n",
    "}\n",
    "df = pd.DataFrame(data)\n",
    "print(\"原始数据：\")\n",
    "print(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.036794300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df.drop_duplicates()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.038792900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 创建 DataFrame\n",
    "df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['row1', 'row2'])\n",
    "print(\"原始数据：\")\n",
    "print(df)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.039792200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "stacked = df.stack()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.043789800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(stacked)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.047787400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "columns = pd.MultiIndex.from_tuples([('A', 'X'), ('A', 'Y'), ('B', 'X')])"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.050785800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "columns"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.053783800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_multi = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=['row1', 'row2'], columns=columns)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.055782700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "df_multi"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.058782200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "stacked_inner = df_multi.stack()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.060779800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "stacked_inner"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "start_time": "2025-08-25T07:17:49.062779600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "data": {
      "text/plain": "      name  age         city\n0    Alice   25     New York\n1      Bob   30  Los Angeles\n2  Charlie   35      Chicago",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>name</th>\n      <th>age</th>\n      <th>city</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Alice</td>\n      <td>25</td>\n      <td>New York</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Bob</td>\n      <td>30</td>\n      <td>Los Angeles</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Charlie</td>\n      <td>35</td>\n      <td>Chicago</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-08-25T07:18:49.187862200Z",
     "start_time": "2025-08-25T07:18:49.034243900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
